home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Cream of the Crop 1
/
Cream of the Crop 1.iso
/
PROGRAM
/
DDJ0192.ARJ
/
DSP.ASC
< prev
next >
Wrap
Text File
|
1991-11-21
|
26KB
|
687 lines
_PARALLEL DSP FOR DESIGNING ADAPTIVE FILTERS_
by Daniel Chen
[LISTING NEW]
/******* PSEUDO C CODE FOR CASCADE ADAPTIVE FILTER #1 *******/
/* Initialization */
xptr = &x[0];
wptr = &w[0];
for (i=0;i<N1;i++){
*xptr++ = 0.0;
*wptr++ = 0.0;
}
/* N1-1
* Compute y1 = SUM w[i] * x[i]
* i=0
*/
xptr = &x[0];
wptr = &w[0];
input(x); /* input x from A/D converter */
*xptr = x;
input (d); /* input d from A/D converter */
for (i=0;i<N1;i++)
y1 += *xptr++ * *wptr++;
/* Compute y = y1 + y2 + y3 + y4 */
receive(y2,y3,y4); /* receive y2, y3, y4 form processor 2, 3, 4 */
y = y1 + y2 + y3 + y4;
/* Compute error signal e */
e = d - y;
output(y); /* output y to D/A converter */
pass(e); /* pass e to processor 2, 3, 4 */
/* Update filter weights w[] */
xptr = &x[N1-1];
wptr = &w[N1-1];
pass (*xptr); /* pass x(n-N1) to processor #2 */
for (i=N1;i>0;i--){
*wptr-- += mu * e *xptr--;
*(xptr+1) = *xptr; /* delayed tap is implemented in circular buffer */
}
[LISTING TWO]
/******* PSEUDO C CODE FOR CASCADE ADAPTIVE FILTER #2 *******/
/* Initialization */
xptr = &x[0];
wptr = &w[0];
for (i=0;i<N2;i++){
*xptr++ = 0.0;
*wptr++ = 0.0;
}
/* N2-1
* Compute y2 = SUM w[i] * x[i]
* i=0
*/
xptr = &x[0];
wptr = &w[0];
receive(x); /* receive x(n-N1) from processor #1 */
*xptr = x;
for (i=0;i<N2;i++)
y2 += *xptr++ * *wptr++;
/* pass y2 and receive e */
pass(y2); /* pass y2 to processor #1 */
receive(e); /* receive e(n) form processor #1 */
/* Update filter weights w[] */
xptr = &x[N2-1];
wptr = &w[N2-1];
pass (*xptr); /* pass x(n-N1-N2) to processor #3 */
for (i=N2;i>0;i--){
*wptr-- += mu * e *xptr--;
*(xptr+1) = *xptr; /* delayed tap is implemented in circular buffer */
}
[LISTING THREE]
/****** PSEUDO C CODE FOR CASCADE ADAPTIVE FILTER #3 ******/
/* Initialization */
xptr = &x[0];
wptr = &w[0];
for (i=0;i<N3;i++){
*xptr++ = 0.0;
*wptr++ = 0.0;
}
/* N3-1
* Compute y3 = SUM w[i] * x[i]
* i=0
*/
xptr = &x[0];
wptr = &w[0];
receive(x); /* receive x(n-N1-N2) from processor #2 */
*xptr = x;
for (i=0;i<N3;i++)
y3 += *xptr++ * *wptr++;
/* pass y3 and receive e */
pass(y3); /* pass y3 to processor #1 */
receive(e); /* receive e(n) form processor #1 */
/* Update filter weights w[] */
xptr = &x[N3-1];
wptr = &w[N3-1];
pass (*xptr); /* pass x(n-N1-N2-N3) to processor #4 */
for (i=N3;i>0;i--){
*wptr-- += mu * e *xptr--;
*(xptr+1) = *xptr; /* delayed tap is implemented
in circular buffer */
}
[LISTING FOUR]
/****** PSEUDO C CODE FOR CASCADE ADAPTIVE FILTER #4 ******/
/* Initialization */
xptr = &x[0];
wptr = &w[0];
for (i=0;i<N4;i++){
*xptr++ = 0.0;
*wptr++ = 0.0;
}
/* N4-1
* Compute y4 = SUM w[i] * x[i]
* i=0
*/
xptr = &x[0];
wptr = &w[0];
receive(x); /* receive x(n-N1-N2-N3) from processor #3 */
*xptr = x;
for (i=0;i<N4;i++)
y4 += *xptr++ * *wptr++;
/* pass y4 and receive e */
pass(y4); /* pass y4 to processor #1 */
receive(e); /* receive e(n) form processor #1 */
/* Update filter weights w[] */
xptr = &x[N4-1];
wptr = &w[N4-1];
for (i=N3;i>0;i--){
*wptr-- += mu * e *xptr--;
*(xptr+1) = *xptr; /* delayed tap is implemented
in circular buffer */
}
[LISTING FIVE]
**********************************************************************
* CONST.H - This file set up the constant for Cascade TMS320C40
* Adaptive Filter programs: LMS1.ASM LMS2.ASM LMS3.ASM LMS4.ASM
**********************************************************************
order1 .set N1 ; filter order for #1 C40
order2 .set N2 ; filter order for #2 C40
order3 .set N3 ; filter order for #3 C40
order4 .set N4 ; filter order for #4 C40
mu .set 0.01 ; step size
io_port .set 0100081h ; data I/O comm port addr for d, x, & y
C40_1_2 .set 0100041h ; comm port address from #1 to #2 C40
C40_1_3 .set 0100051h ; comm port address from #1 to #3 C40
C40_1_4 .set 0100061h ; comm port address from #1 to #4 C40
C40_2_1 .set 0100071h ; comm port address from #2 to #1 C40
C40_2_3 .set 0100061h ; comm port address from #2 to #3 C40
C40_2_4 .set 0100051h ; comm port address from #2 to #4 C40
C40_3_1 .set 0100081h ; comm port address from #3 to #1 C40
C40_3_2 .set 0100071h ; comm port address from #3 to #2 C40
C40_3_4 .set 0100061h ; comm port address from #3 to #4 C40
C40_4_1 .set 0100071h ; comm port address from #4 to #1 C40
C40_4_2 .set 0100081h ; comm port address from #4 to #2 C40
C40_4_3 .set 0100091h ; comm port address from #4 to #3 C40
[LISTING SIX]
******************************************************************
* LMS1 : Cascade TMS320C40 adaptive filter #1 Using Transversal
* Structure and LMS Algorithm, Looped Code
* Configuration:
* d(n) --------------------------+
* |
* e(n) |+
* +-----<-----(SUM)
* | |-
* --------+-------- |
* x(n) ----|Adaptive Filter|-----+--------> y(n)
* -----------------
* +--------<-------+-------<--------+-------<--------+
* | |y2(n) |y3(n) |y4(n)
* y(n)<-+ | | | |
* | +----+----+ +----+----+ +----+----+ +----+----+
* +--|TMS320C40|x(n1) |TMS320C40|x(n2) |TMS320C40|x(n3) |TMS320C40|
* x(n)---->| |----->| |----->| |----->| |
* +->| # 1 | | # 2 | | # 3 | | # 4 |
* | +----+----+ +----+----+ +----+----+ +----+----+
* d(n)--+ | | | |
* e(n)| | | |
* +-------->-------+------->--------+------->--------+
* where n1 = n-N1, n2 = n-N1-N2, and n3 = n-N1-N2-N3
* Algorithm for processor #1:
* N1-1
* y1(n) = SUM w(k)*x(n-k) k=0,1,2,...,N1-1
* k=0
* y(n) = y1(n) + y2(n) + y3(n) + y4(n)
* e(n) = d(n) - y(n)
* w(k) = w(k) + u*e(n)*x(n-k) k=0,1,2,...,N1-1
* where filter order N = N1 + N2 + N3 + N4 and u is the step size mu,
**********************************************************************
.include "const.h" ; include the constant definition file
.sect "vector"
reset .word begin
; Initialize pointers and arrays
; xptr = &x[0];
; wptr = &w[0];
; for (i=0;i<N1;i++){
; *xptr++ = 0.0;
; *wptr++ = 0.0;
; }
.text
begin .set $
LDP @io_addr ; set data page
LDI 0,R2 ; R2 = 0
LDF 0.0,R1 ; R1 = 0.0
LDI @io_addr,AR4 ; set pointer for data I/O
LDI @C40addr2,AR5 ; set pointer for #2 C40 comm port
LDI @C40addr3,AR6 ; set pointer for #3 C40 comm port
LDI @C40addr4,AR7 ; set pointer for #4 C40 comm port
LDI @xn_addr,AR0 ; set pointer for x[]
LDI @wn_addr,AR1 ; set pointer for w[]
STI R2,*-AR5(1) ; enable #2 C40 comm port
STI R2,*-AR6(1) ; enable #3 C40 comm port
STI R2,*-AR7(1) ; enable #4 C40 comm port
STF R1,*+AR5(1) ; start #2 C40
RPTS order1-1
STF R1,*AR0++(1)% ; x[] = 0.
|| STF R1,*AR1++(1)% ; w[] = 0.
LDI order1,BK ; set up circular buffer
input:
; Compute filter output y1(n)
; xptr = &x[0];
; wptr = &w[0];
; input(x); /* input x from A/D converter */
; input (d); /* input d from A/D converter */
; *xptr = x;
; for (i=0;i<N1;i++)
; y1 += *xptr++ * *wptr++;
LDI order1-2,RC
RPTBD filter
LDF *AR4,R6 ; input x(n)
LDF *AR4,R7 ; input d(n)
|| STF R6,*AR0 ; insert x(n) to buffer
MPYF3 *AR0++(1)%,*AR1++(1)%,R1
|| SUBF3 R2,R2,R2 ; R2 = 0.0
filter MPYF3 *AR0++(1)%,*AR1++(1)%,R1
|| ADDF3 R1,R2,R2 ; y1(n) = w[].x[]
ADDF R1,R2 ; include last result
; compute y(n) signals
; receive(y2,y3,y4); /* receive y2, y3, y4 form processor 2, 3, 4 */
; y = y1 + y2 + y3 + y4;
ADDF *AR5,R2 ; add y2(n)
ADDF *AR6,R2 ; add y3(n)
ADDF *AR7,R2 ; add y4(n)
; Compute error signal e(n)
; e = d - y;
; pass(e); /* pass e to processor 2, 3, 4 */
SUBF R2,R7 ; e(n) = d(n) - y(n)
MPYF @u,R7 ; R7 = err = e(n) * u
; Output y(n) signal and e(n)
; output(y); /* output y to D/A converter */
; pass(e); /* pass e to processor 2, 3, 4 */
STF R7,*+AR5(1) ; send out e(n)
|| STF R7,*+AR6(1) ; send out e(n)
STF R2,*+AR4(1) ; send out y(n)
|| STF R7,*+AR7(1) ; send out e(n)
; Update weights w(n)
; xptr = &x[N1-1];
; wptr = &w[N1-1];
; pass (*xptr); /* pass x(n-N1) to processor #2 */
; for (i=N1;i>0;i--){
; *wptr-- += mu * e *xptr--;
; *(xptr+1) = *xptr; /* delayed tap is implemented
; in circular buffer */
; }
LDI order1-3,RC ; initialize repeat counter
RPTBD weight ; do i = 0, N-3
MPYF3 R7,*AR0++(1)%,R1 ; R1 = err * x(n)
ADDF3 R1,*AR1,R2 ; R2 = wi(n) + err * x(n)
NOP
MPYF3 R7,*AR0++(1)%,R1 ; R1 = err * x(n-i-1)
|| STF R2,*AR1++(1)% ; update wi(n+1)
weight ADDF3 R1,*AR1,R2 ; R2 = wi(n) + err * x(n-i)
LDF *AR0,R6
|| STF R2,*AR1++(1)% ; update wi(n+1)
BD input ; delay branch
MPYF3 R7,*AR0,R1 ; R1 = err * x(n-N+1)
|| STF R6,*+AR5(1) ; shift x(n-N) to #2 C40
ADDF3 R1,*AR1,R2 ; R2 = wi(n-N+1) + err * x(n-N+1)
STF R2,*AR1++(1)% ; update last w
; Define constants
xn .usect "buffer",order1
wn .usect "coeffs",order1
.data
io_addr .word io_port
C40addr2 .word C40_1_2
C40addr3 .word C40_1_3
C40addr4 .word C40_1_4
xn_addr .word xn
wn_addr .word wn
u .float mu
.end
[LISTING SEVEN]
******************************************************************
* LMS2 : Cascade TMS320C40 adaptive filter #2 Using Transversal
* Structure and LMS Algorithm, Looped Code
* Configuration:
* d(n) --------------------------+
* |
* e(n) |+
* +-----<-----(SUM)
* | |-
* --------+-------- |
* x(n) ----|Adaptive Filter|-----+--------> y(n)
* -----------------
* +--------<-------+-------<--------+-------<--------+
* | |y2(n) |y3(n) |y4(n)
* y(n)<-+ | | | |
* | +----+----+ +----+----+ +----+----+ +----+----+
* +--|TMS320C40|x(n1) |TMS320C40|x(n2) |TMS320C40|x(n3) |TMS320C40|
* x(n)---->| |----->| |----->| |----->| |
* +->| # 1 | | # 2 | | # 3 | | # 4 |
* | +----+----+ +----+----+ +----+----+ +----+----+
* d(n)--+ | | | |
* e(n)| | | |
* +-------->-------+------->--------+------->--------+
* where n1 = n-N1, n2 = n-N1-N2, and n3 = n-N1-N2-N3
* Algorithm for processor #2:
* N2-1
* y2(n) = SUM w(N1+k)*x(n-N1-k) k=0,1,2,...,N2-1
* k=0
* w(N1+k) = w(N1+k) + u*e(n)*x(n-N1-k) k=0,1,2,...,N2-1
* where filter order N = N1 + N2 + N3 + N4 and u is the step size mu.
**********************************************************************
.include "const.h" ; include the constant definition file
.sect "vector"
reset .word begin
; Initialize pointers and arrays
; xptr = &x[0];
; wptr = &w[0];
; for (i=0;i<N2;i++){
; *xptr++ = 0.0;
; *wptr++ = 0.0;
; }
.text
begin .set $
LDP @C40addr1 ; set data page
LDI 0,R2 ; R2 = 0
LDF 0.0,R1 ; R1 = 0.0
LDI @C40addr1,AR5 ; set pointer for #1 C40 comm port
LDI @C40addr3,AR6 ; set pointer for #3 C40 comm port
LDI @C40addr4,AR7 ; set pointer for #4 C40 comm port
LDI @xn_addr,AR0 ; set pointer for x[]
LDI @wn_addr,AR1 ; set pointer for w[]
STI R2,*-AR6(1) ; enable #3 C40 comm port
STI R2,*-AR5(1) ; enable #1 C40 comm port
STI R2,*-AR7(1) ; enable #4 C40 comm port
STF R1,*+AR6(1) ; start #3 C40
RPTS order2-1
STF R1,*AR0++(1)% ; x[] = 0.
|| STF R1,*AR1++(1)% ; w[] = 0.
LDI order2,BK ; set up circular buffer
input:
; Compute filter output y(n)
; xptr = &x[0];
; wptr = &w[0];
; receive(x); /* receive x(n-N1) from processor #1 */
; *xptr = x;
; for (i=0;i<N2;i++)
; y2 += *xptr++ * *wptr++;
LDI order2-2,RC
RPTBD filter
LDF *AR5,R6 ; input x(n)
STF R6,*AR0 ; insert x(n) to buffer
MPYF3 *AR0++(1)%,*AR1++(1)%,R1
|| SUBF3 R2,R2,R2 ; R2 = 0.0
filter MPYF3 *AR0++(1)%,*AR1++(1)%,R1
|| ADDF3 R1,R2,R2 ; y2(n) = w[].x[]
ADDF R1,R2 ; include last result
; Output y2(n) signals
; pass(y2); /* pass y2 to processor #1 */
STF R2,*+AR5(1) ; send y2(n) to #1 C40
; Input error signal e(n)
; receive(e); /* receive e(n) form processor #1 */
LDF *AR5,R7 ; load e(n) from #1 C40
; Update weights w(n)
; xptr = &x[N2-1];
; wptr = &w[N2-1];
; pass (*xptr); /* pass x(n-N1-N2) to processor #3 */
; for (i=N2;i>0;i--){
; *wptr-- += mu * e *xptr--;
; *(xptr+1) = *xptr; /* delayed tap is implemented
; in circular buffer */
; }
;
LDI order2-3,RC ; initialize repeat counter
RPTBD weight ; do i = 0, N2-3
MPYF3 R7,*AR0++(1)%,R1 ; R1 = err * x(n)
ADDF3 R1,*AR1,R2 ; R2 = wi(n) + err * x(n)
NOP
MPYF3 R7,*AR0++(1)%,R1 ; R1 = err * x(n-i-1)
|| STF R2,*AR1++(1)% ; update wi(n+1)
weight ADDF3 R1,*AR1,R2 ; R2 = wi(n) + err * x(n-i)
LDF *AR0,R6
|| STF R2,*AR1++(1)% ; update wi(n+1)
BD input ; delay branch
MPYF3 R7,*AR0,R1 ; R1 = err * x(n-N+1)
|| STF R6,*+AR6(1) ; shift x(n-N) to #3 C40
ADDF3 R1,*AR1,R2 ; R2 = wi(n-N+1) + err * x(n-N+1)
STF R2,*AR1++(1)% ; update last w
; Define constants
xn .usect "buffer",order2
wn .usect "coeffs",order2
.data
C40addr1 .word C40_2_1
C40addr3 .word C40_2_3
C40addr4 .word C40_2_4
xn_addr .word xn
wn_addr .word wn
.end
[LISTING EIGHT]
******************************************************************
* LMS3 : Cascade TMS320C40 adaptive filter #3 Using Transversal
* Structure and LMS Algorithm, Looped Code
* Configuration:
* d(n) --------------------------+
* |
* e(n) |+
* +-----<-----(SUM)
* | |-
* --------+-------- |
* x(n) ----|Adaptive Filter|-----+--------> y(n)
* -----------------
* +--------<-------+-------<--------+-------<--------+
* | |y2(n) |y3(n) |y4(n)
* y(n)<-+ | | | |
* | +----+----+ +----+----+ +----+----+ +----+----+
* +--|TMS320C40|x(n1) |TMS320C40|x(n2) |TMS320C40|x(n3) |TMS320C40|
* x(n)---->| |----->| |----->| |----->| |
* +->| # 1 | | # 2 | | # 3 | | # 4 |
* | +----+----+ +----+----+ +----+----+ +----+----+
* d(n)--+ | | | |
* e(n)| | | |
* +-------->-------+------->--------+------->--------+
* where n1 = n-N1, n2 = n-N1-N2, and n3 = n-N1-N2-N3
* Algorithm for processor #3:
* N3-1
* y3(n) = SUM w(N1+N2+k)*x(n-N1-N2-k) k=0,1,2,...,N3-1
* k=0
* w(N1+N2+k) = w(N1+N2+k) + u*e(n)*x(n-N1-N2-k) k=0,1,2,...,N3-1
* where filter order N = N1 + N2 + N3 + N4 and u is the step size mu.
**********************************************************************
.include "const.h" ; include the constant definition file
.sect "vector"
reset .word begin
; Initialize pointers and arrays
; xptr = &x[0];
; wptr = &w[0];
; for (i=0;i<N3;i++){
; *xptr++ = 0.0;
; *wptr++ = 0.0;
; }
.text
begin .set $
LDP @C40addr1 ; set data page
LDI 0,R2 ; R2 = 0
LDF 0.0,R1 ; R1 = 0.0
LDI @C40addr1,AR5 ; set pointer for #1 C40 comm port
LDI @C40addr2,AR6 ; set pointer for #2 C40 comm port
LDI @C40addr4,AR7 ; set pointer for #4 C40 comm port
LDI @xn_addr,AR0 ; set pointer for x[]
LDI @wn_addr,AR1 ; set pointer for w[]
STI R2,*-AR7(1) ; enable #4 C40 comm port
STI R2,*-AR6(1) ; enable #2 C40 comm port
STI R2,*-AR5(1) ; enable #1 C40 comm port
STF R1,*+AR7(1) ; start #4 C40
RPTS order3-1
STF R1,*AR0++(1)% ; x[] = 0.
|| STF R1,*AR1++(1)% ; w[] = 0.
LDI order3,BK ; set up circular buffer
input:
; Compute filter output y(n)
; xptr = &x[0];
; wptr = &w[0];
; receive(x); /* receive x(n-N1-N2) from processor #2 */
; *xptr = x;
; for (i=0;i<N3;i++)
; y3 += *xptr++ * *wptr++;
LDI order3-2,RC
RPTBD filter
LDF *AR6,R6 ; input x(n)
STF R6,*AR0 ; insert x(n) to buffer
MPYF3 *AR0++(1)%,*AR1++(1)%,R1
|| SUBF3 R2,R2,R2 ; R2 = 0.0
filter MPYF3 *AR0++(1)%,*AR1++(1)%,R1
|| ADDF3 R1,R2,R2 ; y3(n) = w[].x[]
ADDF R1,R2 ; include last result
; Output y2(n) signals
; pass(y3); /* pass y3 to processor #1 */
STF R2,*+AR5(1) ; send y3(n) to #1 C40
; Input error signal e(n)
; receive(e); /* receive e(n) form processor #1 */
LDF *AR5,R7 ; load e(n) from #1 C40
; Update weights w(n)
; xptr = &x[N3-1];
; wptr = &w[N3-1];
; pass (*xptr); /* pass x(n-N1-N2-N3) to processor #4 */
; for (i=N3;i>0;i--){
; *wptr-- += mu * e *xptr--;
; *(xptr+1) = *xptr; /* delayed tap is implemented
; in circular buffer */
; }
;
LDI order3-3,RC ; initialize repeat counter
RPTBD weight ; do i = 0, N3-3
MPYF3 R7,*AR0++(1)%,R1 ; R1 = err * x(n)
ADDF3 R1,*AR1,R2 ; R2 = wi(n) + err * x(n)
NOP
MPYF3 R7,*AR0++(1)%,R1 ; R1 = err * x(n-i-1)
|| STF R2,*AR1++(1)% ; update wi(n+1)
weight ADDF3 R1,*AR1,R2 ; R2 = wi(n) + err * x(n-i)
LDF *AR0,R6
|| STF R2,*AR1++(1)% ; update wi(n+1)
BD input ; delay branch
MPYF3 R7,*AR0,R1 ; R1 = err * x(n-N+1)
|| STF R6,*+AR7(1) ; shift x(n-N) to #4 C40
ADDF3 R1,*AR1,R2 ; R2 = wi(n-N+1) + err * x(n-N+1)
STF R2,*AR1++(1)% ; update last w
; Define constants
xn .usect "buffer",order3
wn .usect "coeffs",order3
.data
C40addr1 .word C40_3_1
C40addr2 .word C40_3_2
C40addr4 .word C40_3_4
xn_addr .word xn
wn_addr .word wn
.end
[LISTING NINE]
******************************************************************
* LMS4 : Cascade TMS320C40 adaptive filter #4 Using Transversal
* Structure and LMS Algorithm, Looped Code
* Configuration:
* d(n) --------------------------+
* |
* e(n) |+
* +-----<-----(SUM)
* | |-
* --------+-------- |
* x(n) ----|Adaptive Filter|-----+--------> y(n)
* -----------------
* +--------<-------+-------<--------+-------<--------+
* | |y2(n) |y3(n) |y4(n)
* y(n)<-+ | | | |
* | +----+----+ +----+----+ +----+----+ +----+----+
* +--|TMS320C40|x(n1) |TMS320C40|x(n2) |TMS320C40|x(n3) |TMS320C40|
* x(n)---->| |----->| |----->| |----->| |
* +->| # 1 | | # 2 | | # 3 | | # 4 |
* | +----+----+ +----+----+ +----+----+ +----+----+
* d(n)--+ | | | |
* e(n)| | | |
* +-------->-------+------->--------+------->--------+
* where n1 = n-N1, n2 = n-N1-N2, and n3 = n-N1-N2-N3
* Algorithm for processor #4:
* N4-1
* y4(n) = SUM w(N1+N2+N3+k)*x(n-N1-N2-N3-k) k=0,1,2,...,N4-1
* k=0
* w(N1+N2+N3+k) = w(N1+N2+N3+k) + u*e(n)*x(n-N1-N2-N3-k) k=0,1,2,...,N4-1
* where filter order N = N1 + N2 + N3 + N4 and u is the step size mu.
**********************************************************************
.include "const.h" ; include the constant definition file
.sect "vector"
reset .word begin
; Initialize pointers and arrays
; xptr = &x[0];
; wptr = &w[0];
; for (i=0;i<N4;i++){
; *xptr++ = 0.0;
; *wptr++ = 0.0;
; }
.text
begin .set $
LDP @C40addr1 ; set data page
LDI 0,R2 ; R2 = 0
LDF 0.0,R1 ; R1 = 0.0
LDI @C40addr1,AR5 ; set pointer for #1 C40 comm port
LDI @C40addr2,AR6 ; set pointer for #2 C40 comm port
LDI @C40addr3,AR7 ; set pointer for #3 C40 comm port
LDI @xn_addr,AR0 ; set pointer for x[]
LDI @wn_addr,AR1 ; set pointer for w[]
STI R2,*-AR5(1) ; enable #1 C40 comm port
STI R2,*-AR6(1) ; enable #2 C40 comm port
STI R2,*-AR7(1) ; enable #3 C40 comm port
RPTS order4-1
STF R1,*AR0++(1)% ; x[] = 0.
|| STF R1,*AR1++(1)% ; w[] = 0.
LDI order4,BK ; set up circular buffer
input:
; Compute filter output y(n)
; xptr = &x[0];
; wptr = &w[0];
; receive(x); /* receive x(n-N1-N2-N3) from processor #3 */
; *xptr = x;
; for (i=0;i<N4;i++)
; y4 += *xptr++ * *wptr++;
LDI order4-2,RC
RPTBD filter
LDF *AR7,R6 ; input x(n)
STF R6,*AR0 ; insert x(n) to buffer
MPYF3 *AR0++(1)%,*AR1++(1)%,R1
|| SUBF3 R2,R2,R2 ; R2 = 0.0
filter MPYF3 *AR0++(1)%,*AR1++(1)%,R1
|| ADDF3 R1,R2,R2 ; y4(n) = w[].x[]
ADDF R1,R2 ; include last result
; Output y4(n) signals
; pass(y4); /* pass y4 to processor #1 */
STF R2,*+AR5(1) ; send y4(n) to #1 C40
; Input error signal e(n)
; receive(e); /* receive e(n) form processor #1 */
LDF *AR5,R7 ; load e(n) from #1 C40
; Update weights w(n)
; xptr = &x[N4-1];
; wptr = &w[N4-1];
; for (i=N3;i>0;i--){
; *wptr-- += mu * e *xptr--;
; *(xptr+1) = *xptr; /* delayed tap is implemented
; in circular buffer */
; }
LDI order4-3,RC ; initialize repeat counter
RPTBD weight ; do i = 0, N4-3
MPYF3 R7,*AR0++(1)%,R1 ; R1 = err * x(n)
ADDF3 R1,*AR1,R2 ; R2 = wi(n) + err * x(n)
NOP
MPYF3 R7,*AR0++(1)%,R1 ; R1 = err * x(n-i-1)
|| STF R2,*AR1++(1)% ; update wi(n+1)
weight ADDF3 R1,*AR1,R2 ; R2 = wi(n) + err * x(n-i)
BD input ; delay branch
MPYF3 R7,*AR0,R1 ; R1 = err * x(n-N+1)
|| STF R2,*AR1++(1)% ; update wi(n+1)
ADDF3 R1,*AR1,R2 ; R2 = wi(n-N+1) + err * x(n-N+1)
STF R2,*AR1++(1)% ; update last w
; Define constants
xn .usect "buffer",order4
wn .usect "coeffs",order4
.data
C40addr1 .word C40_4_1
C40addr2 .word C40_4_2
C40addr3 .word C40_4_3
xn_addr .word xn
wn_addr .word wn
.end